##### Step 1: load data and packages #####




library(dplyr)
library(magrittr)
library(readr)
library(haven)
library(stringr)
library(ggplot2)
library(data.table)
library(lfe)
library(fixest)

base_dir <- "..." # Manually change home folder 
data_dir <- paste0(base_dir, "data/")

## function to more easily specify controls in regression formulas
fixest_form <- function(
    y,
    x, 
    fe="0",
    x_re = FALSE,
    df = NULL
) {
  
  if (x_re & !is.null(df)) {
    x <- x %>% map(~ grep(., colnames(df), value=T)) %>% reduce(c)
  }
  
  xpart <- paste(x, collapse = " + ")
  fepart <- paste(fe, collapse = " + ")
  
  
  
  y %>% 
    paste(paste(xpart, fepart, sep="|"), sep=" ~ ") %>%
    as.formula
  
}


## setup for tables
setFixest_dict(c(zip="Zip",
                 district="District",
                 pos_lin_fnc = "FNC Channel Pos.",
                 # pos_lin_msnbc = "MSNBC Channel Pos.",
                 # tea_party_share="Tea Party Cand. Vote Share",
                 state = "State",
                 `as.integer(is_tea_party)` = "Tea Party Entry",
                 fox.ave.pos.popcount = "Weighted Ave. FNC Channel Pos.",
                 fox.density.popcount = "FNC Channel Density"
)
)


setFixest_etable(fitstat = ~ n + r2)

tablestyle  = style.tex(main="aer",
                        fixef.suffix = " FEs",
                        fixef.where ="var",
                        fixef.title = "",
                        stats.title = "\\midrule",
                        tablefoot=FALSE,
                        yesNo="\\checkmark")

# Zip-code level Fox News exposure from Martin and Yorukoglu (2017) -----
load(paste0(data_dir, "zip_2sls_data.RData"))

chpos <- readRDS(paste0(data_dir, "cable_zip.rds"))
load(paste0(data_dir, "zip_demographics_2010.RData"))

census2010 %>% setDT
chpos <- census2010[chpos, on = .(zipcode)]
chpos[,zip:=as.integer(zipcode)]

# ZTCA data for the 111th Congress from Arceneaux et al. (2020)
zcta <- read_dta(paste0(data_dir, "ZCTA_CD_111.dta"))

# Tea Party candidates
tea.party.cands <- read.csv(paste0(data_dir, "tea-party-candidates-with-time-varying-affiliation2.csv"))

# congressional-district level demographics (note that 1st rows are labels)
ds172 <- read.csv(paste0(data_dir, "nhgis0017_ds172_2010_cd110th-112th.csv"), colClasses="character")
ds172 <- ds172[2:nrow(ds172), ]
ds175 <- read.csv(paste0(data_dir, "nhgis0017_ds175_2010_cd110th-112th.csv"), colClasses="character")
ds175 <- ds175[2:nrow(ds175), ]
ds195 <- read.csv(paste0(data_dir, "nhgis0017_ds195_20095_2009_cd110th-112th.csv"), colClasses="character")
ds195 <- ds195[2:nrow(ds195), ]





##### Step 2: obtain district-level measure of Fox News exposure as well as channel controls #####



### Calculate each zip code's share of population in its assigned media market ###

temp <- zip_vote %>% group_by(zip) %>% summarize(
  stid = first(stid)
) %>% ungroup()
chpos %<>% left_join(temp, by="zip")

chpos %<>% filter(position_year==2010)
zipcode_popshare_stid <- 
  chpos  %>% group_by(stid.y, position_year) %>% mutate(
    totpop.stid = sum(totpop, na.rm=TRUE)
  ) %>% ungroup() %>% mutate(
    popshare.stid = totpop/totpop.stid
  ) %>% dplyr::select(stid = stid.y, position_year, zipcode, popshare.stid)
zipcode_popshare_stid %<>% filter(!is.na(stid))

stopifnot(length(unique(zipcode_popshare_stid$zipcode))==nrow(zipcode_popshare_stid))
zcta$ZIPcode %<>% as.character()
zipcode_popshare_stid$zipcode %<>% as.character()
zcta %<>% 
  left_join(zipcode_popshare_stid[, c("zipcode", "popshare.stid")], 
            by=c("ZIPcode"="zipcode")) 




### Obtain cable system characteristics by congressional district ###

chpos$zipcode %<>% as.character()
zcta %<>%
  left_join(chpos[, c("zipcode", "pos_lin_fnc", "num_bc", "num_channels")],
            by=c("ZIPcode"="zipcode"))



### Summarize district-level exposure to Fox News and cable system characteristics by population weights ###

zcta_summ <- 
  zcta %>% group_by(StateAbb, CD111) %>% summarize(
    
    fox.density.popcount = sum(CDtoZIPfactor*!is.na(pos_lin_fnc), na.rm=TRUE),
    fox.ave.pos.popcount = sum(CDtoZIPfactor*pos_lin_fnc, na.rm=TRUE),
    num.channels.popcount = sum(CDtoZIPfactor*num_channels, na.rm=TRUE),
    num.bc.popcount = sum(CDtoZIPfactor*num_bc, na.rm=TRUE)
    
  ) %>% ungroup()





##### Step 3: add district-level demographic controls #####



data(state)
temp <- cbind(state.name, state.abb)
names(temp) <- c("STATE", "state.abb")
temp %<>% as.data.frame()
ds172 %<>% left_join(temp, by=c("STATE"="state.name"))
ds172 %<>% mutate(
  district = paste0(state.abb, as.character(CDA))
)

# "pct_black", "pct_asian", "pct_other", "pct_hisp",
# "pct_male",
# "pct_age_10s", "pct_age_20s", "pct_age_30s", "pct_age_40s", "pct_age_50s", "pct_age_60s", "pct_age_70s", "pct_age_80s",
# "pct_suburban", "pct_urban"

age.vars <- names(ds172)[str_sub(names(ds172),1,3)=="H76"]
ds172 %<>% mutate_at(age.vars, as.integer)

ds172 %<>% mutate(
  pct_black = as.integer(H7X003)/as.integer(H7X001),
  pct_asian = as.integer(H7X005)/as.integer(H7X001),
  pct_other = as.integer(H7X007)/as.integer(H7X001),
  pct_hisp = as.integer(H7Y003)/as.integer(H7Y001),
  pct_male = as.integer(H76002)/as.integer(H76001),
  pct_age_10s = 
    (H76005+H76006+H76007+H76029+H76030+H76031)/H76001,
  pct_age_20s = 
    (H76008+H76009+H76010+H76011+H76032+H76033+H76034+H76035)/H76001,
  pct_age_30s = 
    (H76012+H76013+H76036+H76037)/H76001,
  pct_age_40s = 
    (H76014+H76015+H76038+H76039)/H76001,
  pct_age_50s = 
    (H76016+H76017+H76040+H76041)/H76001,
  pct_age_60s = 
    (H76018+H76019+H76020+H76021+H76042+H76043+H76044+H76045)/H76001,
  pct_age_70s = 
    (H76022+H76023+H76046+H76047)/H76001,
  pct_age_80s = 
    (H76024+H76025+H76048+H76049)/H76001,
  # For classifying suburban, see <https://www.census.gov/programs-surveys/geography/about/faq/2010-urban-area-faq.html#par_textimage_1>
  pct_suburban = as.integer(H7W004)/as.integer(H7W001),
  pct_urban = (as.integer(H7W002)+as.integer(H7W003))/as.integer(H7W001)
)

names(ds172)[str_sub(names(ds172), 1,4)=="pct_"]
cd_controls <- ds172 %>% filter(!is.na(state.abb)) %>% dplyr::select(district, starts_with("pct_"))

# "as.factor(income_dec)",
# "pct_hsgrad", "pct_somecollege", "pct_bach", "pct_postgrad",

ds175 %<>% left_join(temp, by=c("STATE"="state.name"))
ds175 %<>% mutate(
  district = paste0(state.abb, as.character(CDCURRA))
)

edu.vars <- names(ds175)[str_sub(names(ds175),1,3)=="IXM"]
ds175 %<>% mutate_at(edu.vars, as.integer)

ds175 %<>% mutate(
  income_dec = ntile(as.numeric(ds175$I25E001), 10),
  income_dec = as.factor(income_dec),
  pct_hsgrad = (IXME017+IXME018)/IXME001,
  pct_somecollege = (IXME019+IXME020+IXME021)/IXME001,
  pct_bach = IXME022/IXME001,
  pct_postgrad = (IXME023+IXME024+IXME025)/IXME001
)

temp2 <- ds175 %>% filter(!is.na(state.abb)) %>% dplyr::select(district, income_dec, starts_with("pct_"))

cd_controls %<>% left_join(temp2, by="district")





##### Step 4: regressions for Tables E.1 and E.2 #####



zcta_summ %<>% filter(!is.na(StateAbb) & !is.na(CD111))

zcta_summ %<>% mutate(
  district = paste0(StateAbb, ifelse(nchar(CD111)==1, paste0("0", CD111), CD111))
)

tea.party.cands$district %<>% as.character()
stopifnot(length(unique(tea.party.cands$district))==nrow(tea.party.cands))

zcta_summ %<>% left_join(tea.party.cands, by="district")
zcta_summ %<>% mutate(
  is_tea_party = ifelse(is.na(is_tea_party), FALSE, is_tea_party)
)

zcta_summ %<>% left_join(cd_controls, by="district")
zcta_summ %<>% mutate(
  state = as.factor(StateAbb)
)

zcta_summ$fox.ave.pos.popcount = -zcta_summ$fox.ave.pos.popcount

for(fox.measure in c("fox.density", "fox.ave.pos")) {

  for(weighting.method in c("popcount")) {
    
    base_controls <- c(
      paste0("num.channels.", weighting.method),
      paste0("num.bc.", weighting.method)
    )
    
    demog_controls <- c(
                        "pct_black", "pct_asian", "pct_other", "pct_hisp",
                        "pct_male",
                        "pct_age_10s", "pct_age_20s", "pct_age_30s", "pct_age_40s", "pct_age_50s", "pct_age_60s", "pct_age_70s", "pct_age_80s",
                        "income_dec",
                        "pct_hsgrad", "pct_somecollege", "pct_bach", "pct_postgrad",
                        "pct_suburban", "pct_urban"
    )
    
    x <- paste0(fox.measure, ".", weighting.method)
    
    print(paste0("-----baseline reg for ", fox.measure, ".", weighting.method, "-----"))
    f_base <- 
      fixest_form(y="as.integer(is_tea_party)",
                x=c(x, base_controls))
    m_base <- feols(f_base, data=zcta_summ)
    
    print(paste0("-----reg w controls for ", fox.measure, ".", weighting.method, "-----"))    
    f_controls <-       
      fixest_form(y="as.integer(is_tea_party)",
                x=c(x, base_controls, demog_controls))
    m_controls <- feols(f_controls, data=zcta_summ)
    
    print(paste0("-----reg w controls and state fe for ", fox.measure, ".", weighting.method, "-----"))    
    f_controls_statefe <- 
      fixest_form(y="as.integer(is_tea_party)",
                x=c(x, base_controls, demog_controls), fe="state")
    m_controls_statefe <- feols(f_controls_statefe, data=zcta_summ)   
    
    
    title <- ifelse(fox.measure=="fox.density",
                    "No Evidence of Strategic Entry of Tea Party Candidates Based on Congressional District-Level Fox News Density",
                    "No Evidence of Strategic Entry of Tea Party Candidates Based on Congressional District-Level Weighted Average Fox News Channel Position")
    stub <- ifelse(fox.measure=="fox.density", "fox_density", "ave_chpos")
    label = paste0("table:supply-side-", stub)
    
    if(length(m_base$obs_selection)==0) {
      mean_y1 <- mean(as.integer(zcta_summ$is_tea_party), na.rm = TRUE)
    } else {
      mean_y1 <- mean(as.integer(zcta_summ$is_tea_party[setdiff(1:nrow(zcta_summ), -m_base$obs_selection$obsRemoved)]), na.rm = TRUE)
    }
    mean_y2 <- mean(as.integer(zcta_summ$is_tea_party[setdiff(1:nrow(zcta_summ), -m_controls$obs_selection$obsRemoved)]), na.rm = TRUE)
    mean_y3 <- mean(as.integer(zcta_summ$is_tea_party[setdiff(1:nrow(zcta_summ), -m_controls_statefe$obs_selection$obsRemoved)]), na.rm = TRUE)
    
    # Print Tables E.1 and E.2
    print(etable(m_base, m_controls, m_controls_statefe,
           extralines = list("__Mean of DV" = c(sprintf("%.3f", mean_y1), sprintf("%.3f", mean_y2), sprintf("%.3f", mean_y3))),
           title = title,
           cluster=~state,
           drop = "(Constant)",
           group=list("\\midrule Cable system controls"=c(base_controls),
                      "Demographic controls"=c(demog_controls)),
           label = label,
           digits=3,
           digits.stats=2,
           style.tex=tablestyle,
           replace = T,
           placement = "H"
    ))
    
  }
  
}


